home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /************************************************************************/
- /* TITLE: SGML PARSER */
- /* SYSTEM: DTD PROCESSOR */
- /* SUBSYSTEM: */
- /* SOURCE FILE: DTDMS.C */
- /* AUTHOR: STEVE LINDEMAN */
- /* */
- /* DATE CREATED: */
- /* LAST MODIFIED: */
- /* */
- /* REVISIONS */
- /* WHEN WHO WHY */
- /************************************************************************/
- #include <stdio.h>
- #include <setjmp.h>
- #include <unistd.h>
-
- #include "qntyset.h"
- #include "dtd.h"
- #include "dtdfncs.h"
- #include "dtdglbl.h"
- #include "dtddefs.h"
- /* ============================================================ */
- /* == checks to see if there is an SGML declaration. If == */
- /* == there is, it will strip it out. == */
- /* ============================================================ */
- void dogetsgml()
- {
- #ifdef JJJ
- char jarr[6];
- char *comp="<!SGML";
- int i,j,c;
- int fnd=TRUE; /* assume true */
-
- for (i=0; i<6; i++) {
- if ((jarr[i]=jgetc()) == EOF) {
- fnd = FALSE;
- break;
- }
- if (mycompare(jarr[i],comp[i]) == FALSE) {
- fnd = FALSE; /* lower case letters are also checked */
- break;
- }
- }
- if (fnd == FALSE) {
- for (j=i; j>=0; j--) {
- jungetc(jarr[j]);
- }
- }
- else
- while((c=jgetc()) != EOF && c != MDC); /* read characters */
- if (c == EOF)
- terminate(1,"EOF found while in SGML declaration");
- return;
- #endif
- }
- /* ============================================================ */
- /* == compares two characters for case INsensitive equality == */
- /* ============================================================ */
- int mycompare(c1,c2)
- int c1,c2;
- {
- if (c1 == c2) /* if they are the same case and equal */
- return(TRUE);
- if (c1 > 'a' && c1 < 'z' ) /* if c1 is lower case */
- if ((c1-32) == c2)
- return(TRUE);
- if (c2 > 'a' && c2 < 'z' ) /* if c2 is lower case */
- if ((c2-32) == c1)
- return(TRUE);
- return(FALSE);
- }
-
-
- /*------------------------------------------------------*/
- /* G E T _ M A R K E D _ S E C T I O N */
- /* This routine processes a marked section. If */
- /* the section is an INCLUDE section, processing */
- /* is returned to gettoken, else the entire section */
- /* is processed and then control is returned. */
- /*------------------------------------------------------*/
- void get_marked_section()
- {
- register int inchar,statkey;
- int begnum_open;
- short moredata,close_read=FALSE;
- if (++num_open_ms > TAGLVL)
- syntxerr("Error: Number of open marked sections > TAGLVL");
-
- statkey = get_status_keyword();
- INPPS();
- if ((inchar=jgetc()) != '[')
- syntxerr("Error: DSO not found in marked section.");
-
- switch(statkey) {
- case MS_INCLUDE:
- break;
- case MS_RCDATA:
- syntxerr("RCDATA marked section not legal");
- case MS_CDATA:
- syntxerr("CDATA marked section not legal");
- case MS_IGNORE:
- begnum_open = num_open_ms-1; /* already incremented */
- moredata = TRUE;
- while(moredata && (inchar=jgetc())!=EOF)
- if (inchar=='<' && (inchar=jgetc())=='!' && (inchar=jgetc())=='[') {
- if (++num_open_ms > TAGLVL)
- syntxerr("Error: Number open marked sections > TAGLVL.");
- }
- else
- if (inchar==']' && (inchar=jgetc())==']' && (inchar=jgetc())==MARKUP_END) {
- if (--num_open_ms == begnum_open)
- moredata = FALSE;
- }
- else
- if ((char)inchar == EE)
- syntxerr("Error: Entity End found in IGNORE marked section.");
- close_read = TRUE;
- break;
- default:
- syntxerr("Software vault");
- break;
- }
- if (!close_read && statkey!=MS_INCLUDE && statkey!=MS_CDATA && statkey!=MS_RCDATA) {
- if ((inchar=jgetc())!=']' || (inchar=jgetc())!=']')
- syntxerr("Error: MDO not found in marked section.");
- if ((inchar=jgetc()) != MARKUP_END)
- syntxerr("Error: MDC not found in marked section.");
- }
- return;
- }
-
- /* ============================================ */
- /* == G E T S T A T U S K E Y W O R D == */
- /* == Get the Status Keyword. == */
- /* ============================================ */
- int get_status_keyword()
- {
- int retval ,j;
- char keyname[NAMELEN];
-
- retval = MS_INCLUDE; /* default status keyword */
-
- INPPS(); /* input the ps */
-
- while ((j=jgetc()) != '[' ) {
- jungetc(j);
- get_name(keyname);
- if (strncmp(keyname,"IGNORE ",NAMELEN) == 0)
- retval = (MS_IGNORE > retval ? MS_IGNORE : retval);
- else
- if (strncmp(keyname,"CDATA ",NAMELEN) == 0)
- retval = (MS_CDATA > retval ? MS_CDATA : retval);
- else
- if (strncmp(keyname,"RCDATA ",NAMELEN) == 0)
- retval = (MS_RCDATA > retval ? MS_RCDATA : retval);
- else
- if (strncmp(keyname,"INCLUDE ",NAMELEN) == 0)
- retval = (MS_INCLUDE > retval ? MS_INCLUDE : retval);
- else
- if (strncmp(keyname,"TEMP ",NAMELEN) != 0)
- syntxerr("Error Illegal status keyword in marked section");
- INPPS();
- }
- jungetc(j);
- return(retval);
- }
- /* ============================================ */
- /* == G E T _ N A M E == */
- /* == Get the name. == */
- /* ============================================ */
- void get_name(name)
- char *name;
- {
- int j, i;
- char *blank = name;
-
- for (i=0;i<NAMELEN+1;i++) /* blank out the name string */
- *blank++ = ' ';
-
- if (!isnmstrt(j=jgetc()))
- syntxerr("Invalid status keyword name in marked section");
- *name++ = TOUPPER(j); /* make it uppercase */
- ADDCHAR(j);
- i = 0;
- while (i < NAMELEN && isnmchar(j=jgetc())) {
- *name++ = TOUPPER(j);
- i++;
- ADDCHAR(j);
- }
- if (i >= NAMELEN)
- syntxerr("Status Keyword Name exceeds NAMELEN");
- jungetc(j); /* unget the last character read */
- }
-
-
- /*------------------------------------------------------*/
- /* G E T _ M S _ C L O S E S */
- /* This routine reads from 'indoc' as many */
- /* marked section closes as possible. */
- /*------------------------------------------------------*/
- void get_ms_closes()
- {
- int inchar;
- short more_ms_closes=TRUE;
-
- while(more_ms_closes && (inchar=jgetc())!=EOF) {
- if (inchar == ']')
- if ((inchar=jgetc()) == ']')
- if ((inchar=jgetc()) == MARKUP_END) {
- if (--num_open_ms == 0)
- more_ms_closes = FALSE;
- if ((char)(inchar=jgetc()) != EE)
- jungetc(inchar);
- }
- else {
- jungetc(inchar);
- jungetc(']');
- jungetc(']');
- more_ms_closes = FALSE;
- }
- else {
- jungetc(inchar);
- jungetc(']');
- more_ms_closes = FALSE;
- }
- else {
- jungetc(inchar);
- more_ms_closes = FALSE;
- }
- }
- return;
- }
- /* ============================================================ */
-